/*Matching SNI codes of the workplace and the firm-level data for obtaining the primary SNI codes*/
clear all


*cd "/RFS_replication_package/Martinsson_et_al_datasets"

use "arbetstallen_w_SNI20190526.dta"  /*Microdata on installation-level*/

/*In some cases, the company switched SNI codes even if it had only 1 plant. For example, 278278, 02010 up to 2014, and 69201 in 2014 and in 2015. We unify these codes, too.*/

sort bidnr year, stable

g id=_n	
	su id, meanonly
	
	quietly{
	local i
		local s=`r(max)'-1
	forvalues i=1(1)`s'{
local j=`i'+1

replace sni2007	 = sni2007[`i'] in `j' if sni2007[`i']!=sni2007[`j'] & bidnr[`i']==bidnr[`j'] & cfarnr_lopnr[`i']==cfarnr_lopnr[`j']
}
}

/*We calculate how many branches do belong to a particular firm. Where only one branch is observed, the corresponding SNI codes are kept. When the codes are not unique, there are several tricks and techniques I use to identify the primary SNI:
-We count the number of SNI codes in each category, and keep the one with the higher frequency. It is a reasonable assumption that the more plants in the same sub-branch, the more important the plants are (and therefore belong to the primary operation of the firm)
-We match the result with the firm-level data as the arbetsstallen data does not cover all firm-years.*/

g sni2007_2d=substr(sni2007,1,2)

drop if sni2007_2d==""

by bidnr sni2007_2d, sort: gen nvals=_n==1
by bidnr: replace nvals=sum(nvals)
by bidnr: replace nvals=nvals[_N]

egen nvals1=count(sni2007), by(sni2007 bidnr cfarnr_lopnr)

/*The same with the number of employees*/
egen empl_nval=sum(ast_antalpers), by(sni2007 bidnr)


egen max_sni=max(nvals1), by(bidnr)
egen max_sni_empl=max(empl_nval),by(bidnr)

g sni_max=sni2007 if nvals1==max_sni
g sni_max_empl=sni2007 if empl_nval==max_sni_empl



destring sni_max, replace
destring sni_max_empl, replace

egen sni2007_final=max(sni_max), by(bidnr)
egen sni2007_final_empl=max(sni_max_empl),by(bidnr)

keep bidnr year sni2007_final_empl

rename sni2007_final_empl sni2007

drop if sni2007==.
drop if bidnr==.

tostring sni2007, replace
g strleng=strlen(sni2007)
replace sni2007="0"+sni2007 if strleng==4
drop strleng 


sort bidnr year, stable
quietly by bidnr year: gen dup=cond(_N==1,0,_n)
drop if dup>1

drop dup

save arbetstallen_SNI_codes.dta, replace  /*NACE codes on installation level*/


